{ "cells": [ { "cell_type": "markdown", "id": "d240914c", "metadata": {}, "source": [ "## Running SQL in Pandas" ] }, { "cell_type": "code", "execution_count": 1, "id": "c6ecee62", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandasql in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (0.7.3)\n", "Requirement already satisfied: sqlalchemy in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from pandasql) (1.4.7)\n", "Requirement already satisfied: pandas in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from pandasql) (1.2.4)\n", "Requirement already satisfied: numpy in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from pandasql) (1.22.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from pandas->pandasql) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from pandas->pandasql) (2021.1)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas->pandasql) (1.15.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in c:\\users\\sahil choudhary\\anaconda3\\lib\\site-packages (from sqlalchemy->pandasql) (1.0.0)\n" ] } ], "source": [ "!pip install pandasql\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 10, "id": "344bfa78", "metadata": {}, "outputs": [], "source": [ "from pandasql import sqldf\n", "pdsql = lambda q: sqldf(q, globals())\n", "\n", "# Just write the sql query in the arguments of this pdsql object" ] }, { "cell_type": "markdown", "id": "0fe6c62e", "metadata": {}, "source": [ "- Pandasql allows you to query pandas DataFrames using SQL syntax\n", "- Useful for cleaning and filtering" ] }, { "cell_type": "code", "execution_count": 3, "id": "8d500f8d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>num_legs</th>\n", " <th>num_wings</th>\n", " <th>num_specimen_seen</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>4</td>\n", " <td>0</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>8</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>8</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " num_legs num_wings num_specimen_seen\n", "0 2 2 10\n", "1 4 0 2\n", "2 8 0 1\n", "3 0 0 8" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'num_legs': [2, 4, 8, 0],\n", " 'num_wings': [2, 0, 0, 0],\n", " 'num_specimen_seen': [10, 2, 1, 8]})\n", "df" ] }, { "cell_type": "code", "execution_count": 11, "id": "e24219f5", "metadata": {}, "outputs": [], "source": [ "df1=pdsql(\"SELECT * FROM df where num_legs>4\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "c5cd98cd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>num_legs</th>\n", " <th>num_wings</th>\n", " <th>num_specimen_seen</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>8</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " num_legs num_wings num_specimen_seen\n", "0 8 0 1" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": null, "id": "395f2568", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }